{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Infer schema directly, self correct and pick optimal query using CodeChat-Bison"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Imports "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from vertexai.language_models import CodeChatSession\n",
    "from vertexai.language_models import CodeChatModel\n",
    "from google.cloud import bigquery\n",
    "import pandas as pd\n",
    "import logging \n",
    "import time\n",
    "import os "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Setup logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "logger = logging.getLogger(__name__)\n",
    "logger.setLevel(logging.DEBUG)\n",
    "logger.addHandler(logging.StreamHandler())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setup essentials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Adjust display settings\n",
    "pd.set_option('display.max_colwidth', None)  # Set max column width to None to show all content\n",
    "pd.set_option('display.expand_frame_repr', False)  # Prevent truncation of DataFrame HTML representation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "SERVICE_ACCOUNT_CREDENTIALS = './../credentials/vai-key.json'\n",
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SERVICE_ACCOUNT_CREDENTIALS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT_ID = 'arun-genai-bb'\n",
    "MODEL_NAME = 'codechat-bison@latest'\n",
    "TEMPERATURE = 1 \n",
    "MAX_OUTPUT_TOKENS = 2048  # length of the output response | overridding the default value which is 128\n",
    "# TOP_P = 0.95  # default value\n",
    "# TOP_K = 40  # default value\n",
    "LOCATION = 'us-central1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATASET = 'flight_reservations'\n",
    "TABLES = ['customers', 'flights', 'reservations', 'transactions', 'loyality_points']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = CodeChatModel.from_pretrained(MODEL_NAME)\n",
    "bq_client = bigquery.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "    SELECT *\n",
      "    FROM `arun-genai-bb.flight_reservations.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS`\n",
      "    WHERE table_name in (\"customers\",\"flights\",\"reservations\",\"transactions\",\"loyality_points\")\n",
      "\n"
     ]
    }
   ],
   "source": [
    "query = f\"\"\"\n",
    "    SELECT *\n",
    "    FROM `{PROJECT_ID}.{DATASET}.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS`\n",
    "    WHERE table_name in ({','.join([f'\"{table}\"' for table in TABLES])})\n",
    "\"\"\"\n",
    "logger.info(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_catalog</th>\n",
       "      <th>table_schema</th>\n",
       "      <th>table_name</th>\n",
       "      <th>column_name</th>\n",
       "      <th>field_path</th>\n",
       "      <th>data_type</th>\n",
       "      <th>description</th>\n",
       "      <th>collation_name</th>\n",
       "      <th>rounding_mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>transaction_id</td>\n",
       "      <td>transaction_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>amount</td>\n",
       "      <td>amount</td>\n",
       "      <td>FLOAT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>transaction_datetime</td>\n",
       "      <td>transaction_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>reservation_datetime</td>\n",
       "      <td>reservation_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>status</td>\n",
       "      <td>status</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>origin</td>\n",
       "      <td>origin</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>destination</td>\n",
       "      <td>destination</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>departure_datetime</td>\n",
       "      <td>departure_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>arrival_datetime</td>\n",
       "      <td>arrival_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>carrier</td>\n",
       "      <td>carrier</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>price</td>\n",
       "      <td>price</td>\n",
       "      <td>FLOAT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>first_name</td>\n",
       "      <td>first_name</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>last_name</td>\n",
       "      <td>last_name</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>email</td>\n",
       "      <td>email</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>date_of_birth</td>\n",
       "      <td>date_of_birth</td>\n",
       "      <td>DATE</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>created_at</td>\n",
       "      <td>created_at</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    table_catalog         table_schema    table_name           column_name            field_path data_type description collation_name rounding_mode\n",
       "0   arun-genai-bb  flight_reservations  transactions        transaction_id        transaction_id     INT64        None           NULL          None\n",
       "1   arun-genai-bb  flight_reservations  transactions        reservation_id        reservation_id     INT64        None           NULL          None\n",
       "2   arun-genai-bb  flight_reservations  transactions                amount                amount   FLOAT64        None           NULL          None\n",
       "3   arun-genai-bb  flight_reservations  transactions  transaction_datetime  transaction_datetime  DATETIME        None           NULL          None\n",
       "4   arun-genai-bb  flight_reservations  reservations        reservation_id        reservation_id     INT64        None           NULL          None\n",
       "5   arun-genai-bb  flight_reservations  reservations           customer_id           customer_id     INT64        None           NULL          None\n",
       "6   arun-genai-bb  flight_reservations  reservations             flight_id             flight_id     INT64        None           NULL          None\n",
       "7   arun-genai-bb  flight_reservations  reservations  reservation_datetime  reservation_datetime  DATETIME        None           NULL          None\n",
       "8   arun-genai-bb  flight_reservations  reservations                status                status    STRING        None           NULL          None\n",
       "9   arun-genai-bb  flight_reservations       flights             flight_id             flight_id     INT64        None           NULL          None\n",
       "10  arun-genai-bb  flight_reservations       flights                origin                origin    STRING        None           NULL          None\n",
       "11  arun-genai-bb  flight_reservations       flights           destination           destination    STRING        None           NULL          None\n",
       "12  arun-genai-bb  flight_reservations       flights    departure_datetime    departure_datetime  DATETIME        None           NULL          None\n",
       "13  arun-genai-bb  flight_reservations       flights      arrival_datetime      arrival_datetime  DATETIME        None           NULL          None\n",
       "14  arun-genai-bb  flight_reservations       flights               carrier               carrier    STRING        None           NULL          None\n",
       "15  arun-genai-bb  flight_reservations       flights                 price                 price   FLOAT64        None           NULL          None\n",
       "16  arun-genai-bb  flight_reservations     customers           customer_id           customer_id     INT64        None           NULL          None\n",
       "17  arun-genai-bb  flight_reservations     customers            first_name            first_name    STRING        None           NULL          None\n",
       "18  arun-genai-bb  flight_reservations     customers             last_name             last_name    STRING        None           NULL          None\n",
       "19  arun-genai-bb  flight_reservations     customers                 email                 email    STRING        None           NULL          None\n",
       "20  arun-genai-bb  flight_reservations     customers         date_of_birth         date_of_birth      DATE        None           NULL          None\n",
       "21  arun-genai-bb  flight_reservations     customers            created_at            created_at  DATETIME        None           NULL          None"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "schema_columns = bq_client.query(query=query).to_dataframe()\n",
    "schema_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "| table_catalog   | table_schema        | table_name   | column_name          | field_path           | data_type   | description   | collation_name   | rounding_mode   |\n",
      "|:----------------|:--------------------|:-------------|:---------------------|:---------------------|:------------|:--------------|:-----------------|:----------------|\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_id       | transaction_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | amount               | amount               | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_datetime | transaction_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_datetime | reservation_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | status               | status               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | origin               | origin               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | destination          | destination          | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | departure_datetime   | departure_datetime   | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | arrival_datetime     | arrival_datetime     | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | carrier              | carrier              | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | price                | price                | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | first_name           | first_name           | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | last_name            | last_name            | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | email                | email                | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | date_of_birth        | date_of_birth        | DATE        |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | created_at           | created_at           | DATETIME    |               | NULL             |                 |\n"
     ]
    }
   ],
   "source": [
    "schema_columns = schema_columns.to_markdown(index=False)\n",
    "logger.info(schema_columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Utility function to convert text into SQL and automatically correct the SQL query if any execution errors occur. <br>\n",
    "The function also collects the successful query executions, ranks them by latency of execution, and returns the fastest. <br>\n",
    "Additionally, it provides an option to retrieve all the successful queries and their latencies as a dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_and_execute_sql(prompt, max_tries=5, return_all=False):\n",
    "    \"\"\"\n",
    "    Generate an SQL query using the code_gen_model, execute it using bq_client, and rank successful queries by latency.\n",
    "    \n",
    "    Args:\n",
    "    - prompt (str): Prompt to provide to the model for generating SQL.\n",
    "    - max_tries (int): Maximum number of attempts to generate and execute SQL.\n",
    "    - return_all (bool): Flag to determine whether to return all successful queries or only the fastest.\n",
    "    \n",
    "    Returns:\n",
    "    - dict: A dictionary containing the fastest dataframe or all successful dataframes, or error messages and prompt evolution.\n",
    "    \"\"\"\n",
    "    \n",
    "    tries = 0\n",
    "    error_messages = []\n",
    "    prompts = [prompt]\n",
    "    successful_queries = []\n",
    "\n",
    "    chat_session = CodeChatSession(model=model, \n",
    "                                   temperature=TEMPERATURE, \n",
    "                                   max_output_tokens=MAX_OUTPUT_TOKENS)\n",
    "    \n",
    "    while tries < max_tries:\n",
    "        logger.info(f'TRIAL: {tries+1}')\n",
    "        try:\n",
    "            # Predict SQL using the model\n",
    "            start_time = time.time()\n",
    "            response = chat_session.send_message(prompt, temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS)\n",
    "            generated_sql_query = response.text\n",
    "            generated_sql_query = '\\n'.join(generated_sql_query.split('\\n')[1:-1])\n",
    "            logger.info('-' * 50)\n",
    "            logger.info(generated_sql_query)\n",
    "            logger.info('-' * 50)\n",
    "            # Execute SQL using BigQuery client\n",
    "            df = bq_client.query(generated_sql_query).to_dataframe()\n",
    "            latency = time.time() - start_time\n",
    "            successful_queries.append({\n",
    "                \"query\": generated_sql_query,\n",
    "                \"dataframe\": df,\n",
    "                \"latency\": latency\n",
    "            })\n",
    "            logger.info('SUCCEEDED')\n",
    "            # Evolve the prompt for success path to optimize the last successful query for latency\n",
    "            if len(successful_queries) > 1:\n",
    "                prompt = f\"\"\"Modify the last successful SQL query by making changes to it and optimizing it for latency. \n",
    "            ENSURE that the NEW QUERY is DIFFERENT from the previous one while prioritizing faster execution. \n",
    "            The last successful query was:\n",
    "            {successful_queries[-1][\"query\"]}\"\"\"\n",
    "        except Exception as e:\n",
    "            logger.error('FAILED')\n",
    "            # Catch the error, store the message, and try again\n",
    "            msg = str(e)\n",
    "            error_messages.append(msg)\n",
    "            # Evolve the prompt by appending the error message and asking the model to correct it\n",
    "            prompt = f\"\"\"Encountered an error: {msg}. \n",
    "To address this, please generate an alternative SQL query response that avoids this specific error. \n",
    "Follow the instructions mentioned above to remediate the error. \n",
    "\n",
    "Modify the below SQL query to resolve the issue and ensure it is not a repetition of all previously generated queries.\n",
    "{generated_sql_query}\n",
    " \n",
    "Ensure the revised SQL query aligns precisely with the requirements outlined in the initial question.\n",
    "Additionally, please optimize the query for latency while maintaining correctness and efficiency.\"\"\"\n",
    "            prompts.append(prompt)\n",
    "        logger.info('=' * 100)\n",
    "        tries += 1\n",
    "    # If no successful queries\n",
    "    if len(successful_queries) == 0:\n",
    "        return {\n",
    "            \"error\": \"All attempts exhausted.\",\n",
    "            \"prompts\": prompts,\n",
    "            \"errors\": error_messages\n",
    "        }\n",
    "    \n",
    "    # Sort successful queries by latency\n",
    "    successful_queries.sort(key=lambda x: x['latency'])\n",
    "    \n",
    "    if return_all:\n",
    "        df = pd.DataFrame([(q[\"query\"], q[\"dataframe\"], q[\"latency\"]) for q in successful_queries], columns=[\"Query\", \"Result\", \"Latency\"])\n",
    "        return {\n",
    "            \"dataframe\": df\n",
    "        }\n",
    "    else:\n",
    "        return {\n",
    "            \"fastest_query\": successful_queries[0][\"query\"],\n",
    "            \"result\": successful_queries[0][\"dataframe\"],\n",
    "            \"latency\": successful_queries[0][\"latency\"]\n",
    "        }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test text to SQL scenarios"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Construct the SEED prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
      "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
      "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
      "Additionally, the output column names should specify units where applicable.\n",
      "\n",
      "QUESTION:\n",
      "{}\n",
      "\n",
      "SCHEMA:\n",
      "{}\n",
      "\n",
      "IMPORTANT: \n",
      "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
      "--\n",
      "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "seed_prompt = \"\"\"\n",
    "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
    "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
    "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
    "Additionally, the output column names should specify units where applicable.\\n\n",
    "QUESTION:\n",
    "{}\\n\n",
    "SCHEMA:\n",
    "{}\\n\n",
    "IMPORTANT: \n",
    "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
    "--\n",
    "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
    "\"\"\"\n",
    "logger.info(seed_prompt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Scenario 1: Retrieve Active Reservations for a Specific Date Range"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For this scenario, you want to find all active reservations within a specific date range."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Provide a list of all flight reservations from October 10th to October 15th, 2023\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
      "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
      "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
      "Additionally, the output column names should specify units where applicable.\n",
      "\n",
      "QUESTION:\n",
      "Provide a list of all flight reservations from October 10th to October 15th, 2023\n",
      "\n",
      "SCHEMA:\n",
      "| table_catalog   | table_schema        | table_name   | column_name          | field_path           | data_type   | description   | collation_name   | rounding_mode   |\n",
      "|:----------------|:--------------------|:-------------|:---------------------|:---------------------|:------------|:--------------|:-----------------|:----------------|\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_id       | transaction_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | amount               | amount               | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_datetime | transaction_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_datetime | reservation_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | status               | status               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | origin               | origin               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | destination          | destination          | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | departure_datetime   | departure_datetime   | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | arrival_datetime     | arrival_datetime     | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | carrier              | carrier              | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | price                | price                | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | first_name           | first_name           | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | last_name            | last_name            | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | email                | email                | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | date_of_birth        | date_of_birth        | DATE        |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | created_at           | created_at           | DATETIME    |               | NULL             |                 |\n",
      "\n",
      "IMPORTANT: \n",
      "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
      "--\n",
      "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)\n",
    "logger.info(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\n",
      "WHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON r.flight_id = f.flight_id\n",
      "WHERE r.reservation_datetime BETWEEN TIMESTAMP('2023-10-10 00:00:00') AND TIMESTAMP('2023-10-15 23:59:59');\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\n",
      "WHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON r.flight_id = f.flight_id\n",
      "WHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY r.reservation_datetime ASC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON r.flight_id = f.flight_id\n",
      "WHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY r.reservation_datetime ASC\n",
      "LIMIT 1000;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 312 ms, sys: 65.8 ms, total: 378 ms\n",
      "Wall time: 1min 29s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>16.119379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>19.265791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON r.flight_id = f.flight_id\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY r.reservation_datetime ASC;</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>21.717461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON r.flight_id = f.flight_id\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY r.reservation_datetime ASC\\nLIMIT 1000;</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>22.154007</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Result    Latency\n",
       "0                   SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  16.119379\n",
       "1                   SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59';     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  19.265791\n",
       "2              SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON r.flight_id = f.flight_id\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY r.reservation_datetime ASC;     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  21.717461\n",
       "3  SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON r.flight_id = f.flight_id\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY r.reservation_datetime ASC\\nLIMIT 1000;     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  22.154007"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>reservation_id</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>flight_id</th>\n",
       "      <th>reservation_datetime</th>\n",
       "      <th>status</th>\n",
       "      <th>origin</th>\n",
       "      <th>destination</th>\n",
       "      <th>departure_datetime</th>\n",
       "      <th>arrival_datetime</th>\n",
       "      <th>carrier</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>2023-10-10 10:00:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>SEA</td>\n",
       "      <td>JFK</td>\n",
       "      <td>2023-11-25 06:00:00</td>\n",
       "      <td>2023-11-25 14:30:00</td>\n",
       "      <td>United</td>\n",
       "      <td>550.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>2023-10-12 11:30:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>JFK</td>\n",
       "      <td>MIA</td>\n",
       "      <td>2023-11-27 20:00:00</td>\n",
       "      <td>2023-11-27 23:30:00</td>\n",
       "      <td>American</td>\n",
       "      <td>380.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>2023-10-15 13:20:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>MIA</td>\n",
       "      <td>JFK</td>\n",
       "      <td>2023-11-30 10:00:00</td>\n",
       "      <td>2023-11-30 13:30:00</td>\n",
       "      <td>American</td>\n",
       "      <td>380.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Scenario 2: Identify customers who made reservations in the past N days."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Identify all customers who have made flight reservations within the last 7 days.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  c.email,\n",
      "  t.transaction_datetime AS reservation_datetime\n",
      "FROM arun-genai-bb.flight_reservations.transactions t\n",
      "JOIN arun-genai-bb.flight_reservations.reservations r\n",
      "ON t.reservation_id = r.reservation_id\n",
      "JOIN arun-genai-bb.flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "WHERE t.transaction_datetime >= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\n",
      "AND t.transaction_datetime < CURRENT_DATETIME()\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  c.email,\n",
      "  t.transaction_datetime AS reservation_datetime\n",
      "FROM arun-genai-bb.flight_reservations.transactions t\n",
      "JOIN arun-genai-bb.flight_reservations.reservations r\n",
      "ON t.reservation_id = r.reservation_id\n",
      "JOIN arun-genai-bb.flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "WHERE t.transaction_datetime >= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\n",
      "AND t.transaction_datetime < DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  c.email,\n",
      "  t.transaction_datetime AS reservation_datetime\n",
      "FROM arun-genai-bb.flight_reservations.transactions t\n",
      "JOIN arun-genai-bb.flight_reservations.reservations r\n",
      "ON t.reservation_id = r.reservation_id\n",
      "JOIN arun-genai-bb.flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "WITH t1 AS (\n",
      "  SELECT\n",
      "    t.reservation_id,\n",
      "    t.transaction_datetime\n",
      "  FROM arun-genai-bb.flight_reservations.transactions t\n",
      "  WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)\n",
      ")\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  c.email,\n",
      "  t.transaction_datetime AS reservation_datetime\n",
      "FROM t1\n",
      "JOIN arun-genai-bb.flight_reservations.reservations r\n",
      "ON t1.reservation_id = r.reservation_id\n",
      "JOIN arun-genai-bb.flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "WITH t1 AS (\n",
      "  SELECT\n",
      "    reservation_id,\n",
      "    transaction_datetime\n",
      "  FROM arun-genai-bb.flight_reservations.transactions\n",
      "  WHERE transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)\n",
      ")\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  c.email,\n",
      "  t.transaction_datetime AS reservation_datetime\n",
      "FROM t1\n",
      "JOIN arun-genai-bb.flight_reservations.reservations r\n",
      "ON t1.reservation_id = r.reservation_id\n",
      "JOIN arun-genai-bb.flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 224 ms, sys: 59.8 ms, total: 284 ms\n",
      "Wall time: 1min 20s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "2           15       Mary         Jane  mary.j@example.com  2023-11-04 10:46:00</td>\n",
       "      <td>15.924800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime &gt;= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\\nAND t.transaction_datetime &lt; CURRENT_DATETIME()</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00</td>\n",
       "      <td>18.279402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime &gt;= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\\nAND t.transaction_datetime &lt; DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "2           15       Mary         Jane  mary.j@example.com  2023-11-04 10:46:00</td>\n",
       "      <td>22.835325</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Query                                                                                                                                                                                                                                                                                                                           Result    Latency\n",
       "0                       SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)     customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "2           15       Mary         Jane  mary.j@example.com  2023-11-04 10:46:00  15.924800\n",
       "1                            SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime >= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\\nAND t.transaction_datetime < CURRENT_DATETIME()                                                                                     customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00  18.279402\n",
       "2  SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  c.email,\\n  t.transaction_datetime AS reservation_datetime\\nFROM arun-genai-bb.flight_reservations.transactions t\\nJOIN arun-genai-bb.flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN arun-genai-bb.flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime >= DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY)\\nAND t.transaction_datetime < DATE_ADD(CURRENT_DATETIME(), INTERVAL 1 DAY)     customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "2           15       Mary         Jane  mary.j@example.com  2023-11-04 10:46:00  22.835325"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>email</th>\n",
       "      <th>reservation_datetime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11</td>\n",
       "      <td>Ian</td>\n",
       "      <td>Somerhalder</td>\n",
       "      <td>ian.s@example.com</td>\n",
       "      <td>2023-10-28 17:11:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13</td>\n",
       "      <td>Kate</td>\n",
       "      <td>Winslet</td>\n",
       "      <td>kate.w@example.com</td>\n",
       "      <td>2023-11-02 08:21:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15</td>\n",
       "      <td>Mary</td>\n",
       "      <td>Jane</td>\n",
       "      <td>mary.j@example.com</td>\n",
       "      <td>2023-11-04 10:46:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "2           15       Mary         Jane  mary.j@example.com  2023-11-04 10:46:00"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 3: Calculate Monthly Revenue\n",
    "Calculate the total revenue generated from transactions for a given month and year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Calculate the total revenue generated from transactions in October 2023, specifically from all reservations with a Confirmed status.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = CAST(reservations.reservation_id AS INT64)\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime >= '2023-10-01 00:00:00'\n",
      "  AND transactions.transaction_datetime < '2023-11-01 00:00:00';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN TIMESTAMP('2023-10-01 00:00:00') AND TIMESTAMP('2023-11-01 00:00:00');\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 289 ms, sys: 82.4 ms, total: 371 ms\n",
      "Wall time: 1min 32s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime &gt;= '2023-10-01 00:00:00'\\n  AND transactions.transaction_datetime &lt; '2023-11-01 00:00:00';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>15.278415</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>19.683552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = CAST(reservations.reservation_id AS INT64)\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>19.699193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>24.675394</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                      Query                                     Result    Latency\n",
       "0  SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime >= '2023-10-01 00:00:00'\\n  AND transactions.transaction_datetime < '2023-11-01 00:00:00';     total_revenue_usd\n",
       "0             3860.0  15.278415\n",
       "1                                    SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';     total_revenue_usd\n",
       "0             3860.0  19.683552\n",
       "2                     SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = CAST(reservations.reservation_id AS INT64)\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';     total_revenue_usd\n",
       "0             3860.0  19.699193\n",
       "3                                    SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';     total_revenue_usd\n",
       "0             3860.0  24.675394"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_revenue_usd</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3860.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_revenue_usd\n",
       "0             3860.0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 4: Popular Flight Times\n",
    "Identify the most popular departure hours or days for a given day or month or year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Determine the departure months with the highest frequency for the year 2023.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\n",
      "       COUNT(DISTINCT transaction_id) AS num_transactions\n",
      "FROM arun-genai-bb.flight_reservations.transactions\n",
      "WHERE DATE_TRUNC(transaction_datetime, YEAR) = '2023'\n",
      "GROUP BY departure_month\n",
      "ORDER BY num_transactions DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\n",
      "       COUNT(DISTINCT transaction_id) AS num_transactions\n",
      "FROM arun-genai-bb.flight_reservations.transactions\n",
      "WHERE YEAR(transaction_datetime) = 2023\n",
      "GROUP BY departure_month\n",
      "ORDER BY num_transactions DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\n",
      "       COUNT(DISTINCT transaction_id) AS num_transactions\n",
      "FROM arun-genai-bb.flight_reservations.transactions\n",
      "WHERE EXTRACT(YEAR FROM transaction_datetime) = 2023\n",
      "GROUP BY departure_month\n",
      "ORDER BY num_transactions DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\n",
      "       COUNT(DISTINCT transaction_id) AS num_transactions\n",
      "FROM arun-genai-bb.flight_reservations.transactions\n",
      "WHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\n",
      "GROUP BY departure_month\n",
      "ORDER BY num_transactions DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\n",
      "       COUNT(DISTINCT transaction_id) AS num_transactions\n",
      "FROM (\n",
      "  SELECT transaction_datetime, transaction_id\n",
      "  FROM arun-genai-bb.flight_reservations.transactions\n",
      "  WHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\n",
      ") AS t\n",
      "GROUP BY departure_month\n",
      "ORDER BY num_transactions DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 229 ms, sys: 65.2 ms, total: 294 ms\n",
      "Wall time: 1min 8s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM arun-genai-bb.flight_reservations.transactions\\nWHERE EXTRACT(YEAR FROM transaction_datetime) = 2023\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6</td>\n",
       "      <td>14.605921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM (\\n  SELECT transaction_datetime, transaction_id\\n  FROM arun-genai-bb.flight_reservations.transactions\\n  WHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\n) AS t\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6</td>\n",
       "      <td>15.977074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM arun-genai-bb.flight_reservations.transactions\\nWHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6</td>\n",
       "      <td>20.122611</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                          Query                                                                                                       Result    Latency\n",
       "0                                                                                 SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM arun-genai-bb.flight_reservations.transactions\\nWHERE EXTRACT(YEAR FROM transaction_datetime) = 2023\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;    departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6  14.605921\n",
       "1  SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM (\\n  SELECT transaction_datetime, transaction_id\\n  FROM arun-genai-bb.flight_reservations.transactions\\n  WHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\n) AS t\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;    departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6  15.977074\n",
       "2                                                                     SELECT DATE_TRUNC(transaction_datetime, MONTH) AS departure_month,\\n       COUNT(DISTINCT transaction_id) AS num_transactions\\nFROM arun-genai-bb.flight_reservations.transactions\\nWHERE transaction_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY num_transactions DESC\\nLIMIT 10;    departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6  20.122611"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>departure_month</th>\n",
       "      <th>num_transactions</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-10-01</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-11-01</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  departure_month  num_transactions\n",
       "0      2023-10-01                10\n",
       "1      2023-11-01                 6"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 5: Customer Age Group\n",
    "Group customers by age brackets and count the number in each bracket."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Group customers into five distinct age brackets and count the number of customers in each bracket.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "WITH customer_age AS (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\n",
      "  FROM\n",
      "    `arun-genai-bb.flight_reservations.customers`\n",
      ")\n",
      "\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN age < 20 THEN '0-19'\n",
      "    WHEN age < 30 THEN '20-29'\n",
      "    WHEN age < 40 THEN '30-39'\n",
      "    WHEN age < 50 THEN '40-49'\n",
      "    ELSE '50+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(customer_id) AS num_customers\n",
      "FROM\n",
      "  customer_age\n",
      "GROUP BY\n",
      "  1\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "WITH customer_age AS (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\n",
      "  FROM\n",
      "    `arun-genai-bb.flight_reservations.customers`\n",
      ")\n",
      "\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN age < 20 THEN '0-19'\n",
      "    WHEN age < 30 THEN '20-29'\n",
      "    WHEN age < 40 THEN '30-39'\n",
      "    WHEN age < 50 THEN '40-49'\n",
      "    ELSE '50+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(customer_id) AS num_customers\n",
      "FROM\n",
      "  customer_age\n",
      "GROUP BY\n",
      "  1\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "WITH customer_age AS (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\n",
      "  FROM\n",
      "    `arun-genai-bb.flight_reservations.customers`\n",
      ")\n",
      "\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN age < 20 THEN '0-19'\n",
      "    WHEN age < 30 THEN '20-29'\n",
      "    WHEN age < 40 THEN '30-39'\n",
      "    WHEN age < 50 THEN '40-49'\n",
      "    ELSE '50+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(customer_id) AS num_customers\n",
      "FROM\n",
      "  customer_age\n",
      "GROUP BY\n",
      "  1\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "WITH customer_age AS (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\n",
      "  FROM\n",
      "    `arun-genai-bb.flight_reservations.customers`\n",
      ")\n",
      "\n",
      "SELECT\n",
      "  age_bracket,\n",
      "  COUNT(customer_id) AS num_customers\n",
      "FROM (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    CASE\n",
      "      WHEN age < 20 THEN '0-19'\n",
      "      WHEN age < 30 THEN '20-29'\n",
      "      WHEN age < 40 THEN '30-39'\n",
      "      WHEN age < 50 THEN '40-49'\n",
      "      ELSE '50+'\n",
      "    END AS age_bracket\n",
      "  FROM\n",
      "    customer_age\n",
      ") AS age_brackets\n",
      "GROUP BY\n",
      "  1\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "WITH customer_age AS (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\n",
      "  FROM\n",
      "    `arun-genai-bb.flight_reservations.customers`\n",
      ")\n",
      "\n",
      "SELECT\n",
      "  age_bracket,\n",
      "  COUNT(customer_id) AS num_customers\n",
      "FROM (\n",
      "  SELECT\n",
      "    customer_id,\n",
      "    CASE\n",
      "      WHEN age < 20 THEN '0-19'\n",
      "      WHEN age < 30 THEN '20-29'\n",
      "      WHEN age < 40 THEN '30-39'\n",
      "      WHEN age < 50 THEN '40-49'\n",
      "      ELSE '50+'\n",
      "    END AS age_bracket\n",
      "  FROM\n",
      "    customer_age\n",
      ") AS age_brackets\n",
      "GROUP BY\n",
      "  1\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 311 ms, sys: 73.1 ms, total: 385 ms\n",
      "Wall time: 1min 21s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM (\\n  SELECT\\n    customer_id,\\n    CASE\\n      WHEN age &lt; 20 THEN '0-19'\\n      WHEN age &lt; 30 THEN '20-29'\\n      WHEN age &lt; 40 THEN '30-39'\\n      WHEN age &lt; 50 THEN '40-49'\\n      ELSE '50+'\\n    END AS age_bracket\\n  FROM\\n    customer_age\\n) AS age_brackets\\nGROUP BY\\n  1</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6</td>\n",
       "      <td>14.066733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM (\\n  SELECT\\n    customer_id,\\n    CASE\\n      WHEN age &lt; 20 THEN '0-19'\\n      WHEN age &lt; 30 THEN '20-29'\\n      WHEN age &lt; 40 THEN '30-39'\\n      WHEN age &lt; 50 THEN '40-49'\\n      ELSE '50+'\\n    END AS age_bracket\\n  FROM\\n    customer_age\\n) AS age_brackets\\nGROUP BY\\n  1</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6</td>\n",
       "      <td>14.597461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age &lt; 20 THEN '0-19'\\n    WHEN age &lt; 30 THEN '20-29'\\n    WHEN age &lt; 40 THEN '30-39'\\n    WHEN age &lt; 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6</td>\n",
       "      <td>15.084443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age &lt; 20 THEN '0-19'\\n    WHEN age &lt; 30 THEN '20-29'\\n    WHEN age &lt; 40 THEN '30-39'\\n    WHEN age &lt; 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6</td>\n",
       "      <td>15.687930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age &lt; 20 THEN '0-19'\\n    WHEN age &lt; 30 THEN '20-29'\\n    WHEN age &lt; 40 THEN '30-39'\\n    WHEN age &lt; 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6</td>\n",
       "      <td>22.166036</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Query                                                                                                                                            Result    Latency\n",
       "0  WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM (\\n  SELECT\\n    customer_id,\\n    CASE\\n      WHEN age < 20 THEN '0-19'\\n      WHEN age < 30 THEN '20-29'\\n      WHEN age < 40 THEN '30-39'\\n      WHEN age < 50 THEN '40-49'\\n      ELSE '50+'\\n    END AS age_bracket\\n  FROM\\n    customer_age\\n) AS age_brackets\\nGROUP BY\\n  1    age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6  14.066733\n",
       "1  WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM (\\n  SELECT\\n    customer_id,\\n    CASE\\n      WHEN age < 20 THEN '0-19'\\n      WHEN age < 30 THEN '20-29'\\n      WHEN age < 40 THEN '30-39'\\n      WHEN age < 50 THEN '40-49'\\n      ELSE '50+'\\n    END AS age_bracket\\n  FROM\\n    customer_age\\n) AS age_brackets\\nGROUP BY\\n  1    age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6  14.597461\n",
       "2                                                                                                                WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age < 20 THEN '0-19'\\n    WHEN age < 30 THEN '20-29'\\n    WHEN age < 40 THEN '30-39'\\n    WHEN age < 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1    age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6  15.084443\n",
       "3                                                                                          WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM date_of_birth) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age < 20 THEN '0-19'\\n    WHEN age < 30 THEN '20-29'\\n    WHEN age < 40 THEN '30-39'\\n    WHEN age < 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1    age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6  15.687930\n",
       "4                                                                                                                WITH customer_age AS (\\n  SELECT\\n    customer_id,\\n    DATE_DIFF(CURRENT_DATE(), date_of_birth, YEAR) AS age\\n  FROM\\n    `arun-genai-bb.flight_reservations.customers`\\n)\\n\\nSELECT\\n  CASE\\n    WHEN age < 20 THEN '0-19'\\n    WHEN age < 30 THEN '20-29'\\n    WHEN age < 40 THEN '30-39'\\n    WHEN age < 50 THEN '40-49'\\n    ELSE '50+'\\n  END AS age_bracket,\\n  COUNT(customer_id) AS num_customers\\nFROM\\n  customer_age\\nGROUP BY\\n  1    age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6  22.166036"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age_bracket</th>\n",
       "      <th>num_customers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30-39</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40-49</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20-29</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>50+</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  age_bracket  num_customers\n",
       "0       30-39              6\n",
       "1       40-49              5\n",
       "2       20-29              3\n",
       "3         50+              6"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 6: Age Calculation\n",
    "Calculate the age of customers based on their date of birth and filter those who are above X years old."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Identify and rank all customers aged 18+ who have `Confirmed` reservations for the current month, ordered by their age. Make sure to display their ages in the result.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  DATEDIFF(CURRENT_DATE(), c.date_of_birth) AS age,\n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM flight_reservations.customers c\n",
      "JOIN flight_reservations.reservations r\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE r.status = 'Confirmed'\n",
      "AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\n",
      "AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\n",
      "AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age,\n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM flight_reservations.customers c\n",
      "JOIN flight_reservations.reservations r\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE r.status = 'Confirmed'\n",
      "AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\n",
      "AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\n",
      "AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  c.customer_id,\n",
      "  c.first_name,\n",
      "  c.last_name,\n",
      "  EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM flight_reservations.customers c\n",
      "JOIN flight_reservations.reservations r\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE r.status = 'Confirmed'\n",
      "AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\n",
      "AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\n",
      "AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  customer_id,\n",
      "  first_name,\n",
      "  last_name,\n",
      "  age,\n",
      "  reservation_id,\n",
      "  status\n",
      "FROM (\n",
      "  SELECT\n",
      "    c.customer_id,\n",
      "    c.first_name,\n",
      "    c.last_name,\n",
      "    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\n",
      "    r.reservation_id,\n",
      "    r.status\n",
      "  FROM flight_reservations.customers c\n",
      "  JOIN flight_reservations.reservations r\n",
      "  ON c.customer_id = r.customer_id\n",
      "  WHERE r.status = 'Confirmed'\n",
      "  AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\n",
      "  AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\n",
      "  AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\n",
      ") AS t\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  customer_id,\n",
      "  first_name,\n",
      "  last_name,\n",
      "  age,\n",
      "  reservation_id,\n",
      "  status\n",
      "FROM (\n",
      "  SELECT\n",
      "    c.customer_id,\n",
      "    c.first_name,\n",
      "    c.last_name,\n",
      "    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\n",
      "    r.reservation_id,\n",
      "    r.status\n",
      "  FROM flight_reservations.customers c\n",
      "  JOIN flight_reservations.reservations r\n",
      "    ON c.customer_id = r.customer_id\n",
      "  WHERE r.status = 'Confirmed'\n",
      "  AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\n",
      "  AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\n",
      "  AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\n",
      ") AS t\n",
      "ORDER BY age DESC\n",
      "LIMIT 1000;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 304 ms, sys: 86.2 ms, total: 390 ms\n",
      "Wall time: 1min 20s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n  r.reservation_id,\\n  r.status\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.status = 'Confirmed'\\nAND r.reservation_datetime &gt;= DATE_TRUNC(CURRENT_DATE(), MONTH)\\nAND r.reservation_datetime &lt; DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\nAND c.date_of_birth &lt; DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\nORDER BY age DESC;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>14.337099</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  customer_id,\\n  first_name,\\n  last_name,\\n  age,\\n  reservation_id,\\n  status\\nFROM (\\n  SELECT\\n    c.customer_id,\\n    c.first_name,\\n    c.last_name,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n    r.reservation_id,\\n    r.status\\n  FROM flight_reservations.customers c\\n  JOIN flight_reservations.reservations r\\n  ON c.customer_id = r.customer_id\\n  WHERE r.status = 'Confirmed'\\n  AND r.reservation_datetime &gt;= DATE_TRUNC(CURRENT_DATE(), MONTH)\\n  AND r.reservation_datetime &lt; DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\n  AND c.date_of_birth &lt; DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\n) AS t\\nORDER BY age DESC;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>14.847290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  customer_id,\\n  first_name,\\n  last_name,\\n  age,\\n  reservation_id,\\n  status\\nFROM (\\n  SELECT\\n    c.customer_id,\\n    c.first_name,\\n    c.last_name,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n    r.reservation_id,\\n    r.status\\n  FROM flight_reservations.customers c\\n  JOIN flight_reservations.reservations r\\n    ON c.customer_id = r.customer_id\\n  WHERE r.status = 'Confirmed'\\n  AND r.reservation_datetime &gt;= DATE_TRUNC(CURRENT_DATE(), MONTH)\\n  AND r.reservation_datetime &lt; DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\n  AND c.date_of_birth &lt; DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\n) AS t\\nORDER BY age DESC\\nLIMIT 1000;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>15.433134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age,\\n  r.reservation_id,\\n  r.status\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.status = 'Confirmed'\\nAND r.reservation_datetime &gt;= DATE_TRUNC(CURRENT_DATE(), MONTH)\\nAND r.reservation_datetime &lt; DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\nAND c.date_of_birth &lt; DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\nORDER BY age DESC;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>20.638619</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Result    Latency\n",
       "0                                                                                                                                                      SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n  r.reservation_id,\\n  r.status\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.status = 'Confirmed'\\nAND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\\nAND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\nAND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\nORDER BY age DESC;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  14.337099\n",
       "1                SELECT\\n  customer_id,\\n  first_name,\\n  last_name,\\n  age,\\n  reservation_id,\\n  status\\nFROM (\\n  SELECT\\n    c.customer_id,\\n    c.first_name,\\n    c.last_name,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n    r.reservation_id,\\n    r.status\\n  FROM flight_reservations.customers c\\n  JOIN flight_reservations.reservations r\\n  ON c.customer_id = r.customer_id\\n  WHERE r.status = 'Confirmed'\\n  AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\\n  AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\n  AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\n) AS t\\nORDER BY age DESC;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  14.847290\n",
       "2  SELECT\\n  customer_id,\\n  first_name,\\n  last_name,\\n  age,\\n  reservation_id,\\n  status\\nFROM (\\n  SELECT\\n    c.customer_id,\\n    c.first_name,\\n    c.last_name,\\n    EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM c.date_of_birth) AS age,\\n    r.reservation_id,\\n    r.status\\n  FROM flight_reservations.customers c\\n  JOIN flight_reservations.reservations r\\n    ON c.customer_id = r.customer_id\\n  WHERE r.status = 'Confirmed'\\n  AND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\\n  AND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\n  AND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\n) AS t\\nORDER BY age DESC\\nLIMIT 1000;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  15.433134\n",
       "3                                                                                                                                                                            SELECT\\n  c.customer_id,\\n  c.first_name,\\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age,\\n  r.reservation_id,\\n  r.status\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.status = 'Confirmed'\\nAND r.reservation_datetime >= DATE_TRUNC(CURRENT_DATE(), MONTH)\\nAND r.reservation_datetime < DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH)\\nAND c.date_of_birth < DATE_SUB(CURRENT_DATE(), INTERVAL 18 YEAR)\\nORDER BY age DESC;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  20.638619"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>age</th>\n",
       "      <th>reservation_id</th>\n",
       "      <th>status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>17</td>\n",
       "      <td>Olivia</td>\n",
       "      <td>Newton</td>\n",
       "      <td>75</td>\n",
       "      <td>17</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>16</td>\n",
       "      <td>Nick</td>\n",
       "      <td>Fury</td>\n",
       "      <td>58</td>\n",
       "      <td>16</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13</td>\n",
       "      <td>Kate</td>\n",
       "      <td>Winslet</td>\n",
       "      <td>48</td>\n",
       "      <td>14</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>Ryan</td>\n",
       "      <td>Reynolds</td>\n",
       "      <td>47</td>\n",
       "      <td>20</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>Mary</td>\n",
       "      <td>Jane</td>\n",
       "      <td>30</td>\n",
       "      <td>15</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>18</td>\n",
       "      <td>Peter</td>\n",
       "      <td>Parker</td>\n",
       "      <td>22</td>\n",
       "      <td>18</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".bq-sql-agent",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
